import librosa
import soundfile as sf
import simpleaudio as sa
from demucs import pretrained
from demucs.apply import apply_model

# 读取音频（使用原始字符串）
audio_path = r'C:\Users\victus\Documents\WeChat Files\wxid_zckpz7oz0who22\FileStorage\File\2025-03\实验音频.wav'
y, sr = librosa.load(audio_path, sr=44100)

# 分离人声
model = pretrained.get_model('htdemucs')
sources = apply_model(model, y[None], device='cpu')  # 单声道处理
vocals = sources[0].cpu().numpy()[0]  # 提取人声部分

# 保存人声音频
sf.write("vocals.wav", vocals, sr)
print("人声音频已保存为 vocals.wav")

# 播放人声音频
wave_obj = sa.WaveObject.from_wave_file("vocals.wav")
play_obj = wave_obj.play()
play_obj.wait_done()  # 等待播放结束